/* 
Cleans representatives of national union central boards 
		// input: Dirigentes_CentraisSindicais_final; National_boards_centrals; firstnames_boards_0921_g
		// output: firstnames_boards_0921; National_boards_centrals_names; National_boards_centrals_collapsed
*/ 

cap log close
cap log using "$logs/clean_national_directorates", replace 

//import excel sheets and append them
cap mkdir "$raw/LAI/temp"
xls2dta , save("$raw/LAI/temp") allsheets : import excel using "$raw/LAI/Dirigentes_CentraisSindicais_final.xlsx", firstrow 
xls2dta , save("$files/National_boards_centrals.dta" , replace) : append
use "$files/National_boards_centrals.dta", clear
drop H 
drop if CNPJ==. 
shell rmdir "$raw/LAI/temp" /s /q

//clean roles 
replace FUNÇÃO = strtrim(FUNÇÃO)
replace FUNÇÃO = strltrim(FUNÇÃO)
replace FUNÇÃO = ustrrtrim(FUNÇÃO)
replace FUNÇÃO= "Diretor" if FUNÇÃO=="Direto"
replace FUNÇÃO= "Membro da Diretoria Colegiada" if FUNÇÃO=="Membro de Diretoria Colegiada"
replace FUNÇÃO= "Membro da Diretoria Colegiada" if FUNÇÃO=="Membros da Diretoria  Colgiada"
replace FUNÇÃO= "Membro do Conselho Fiscal" if FUNÇÃO=="Membros do Conselho Fiscal"
replace FUNÇÃO= "Secretário Geral" if FUNÇÃO=="Secretrário Geral"
replace FUNÇÃO= "Suplente de Diretoria" if FUNÇÃO=="Suplente de Diretoria Colegiada"
replace FUNÇÃO= "Vice-Presidente" if FUNÇÃO=="Vice Presidente"
replace FUNÇÃO=strlower(FUNÇÃO)
// clean dates
gen start_month = month(INÍCIOMANDATO)
gen start_year= year(INÍCIOMANDATO)
gen end_month = month(FIMMANDATO)
gen end_year= year(FIMMANDATO)
// names
split DIRIGENTESSINDICAIS, parse(" ") g(n_)
rename n_1 name  
// gender info
preserve 
	keep name 
	duplicates drop 
	export delimited using  "$files/firstnames_boards_0921.csv", replace

	/* Running genderBR on firstnames_boards_0921.csv generates "$files/genderBR/firstnames_boards_0921_g.csv"
	which requires some manual modifications to reduce the number of missing
	gender information for the boards, which is why the file is already provide in
	the replication package
	
	To get the file that we later modified manually, uncomment the next three lines:
				
	local infile "$files/firstnames_boards_0921.csv"
	local outfile "$files/genderBR/firstnames_boards_0921_g.csv"
	shell "$RPATH" "$CODEPATH/get_gender_fromnames.R" "`infile'" "`outfile'"
	
	*/
	
	import delimited "$files/genderBR/firstnames_boards_0921_g.csv", varn(1) clear
	drop v1
	tempfile gender 
	save `gender'
restore
merge m:1 name using `gender'
tab name if _m == 1
tab name if _m == 2
drop if _m == 2
// further name cleaning 
replace gender = "Male" if inlist(name, "ANTÔNIO", "DAMÁZIO", "JOSÉ", "JOÃO", "JOÉO", "LOURENÇO", "NÉLIO") | inlist(name, "SEBASTIÃO", "SENASTIÃO", "SÉRGIO", "ATNÁGORAS")
replace gender = "Female" if inlist(name, "KÁTIA", "MÔNICA", "VALÉRIA")
replace gender = "Male" if inlist(name, "ALDIERIO", "AMAURINHO", "FABIAN", "ADENIR", "ADIR", "ALVACIR", "CLAUDECIR") | inlist(name, "DARY", "ELIMAR", "ELY", "GEDIR", "HELY", "IVIS", "JOCENIR", "LINDOMAR") |  inlist(name, "NILSONJOSE", "PEREIRA", "REGINOVALDO", "RIZONILSON", "SAMMER", "TEREZINHO") | inlist(name,  "VALDECI", "VALDERLI", "ZIVAN")
replace gender = "Female" if inlist(name, "ESTENIZA", "IZABELIZA", "JUVANDIA", "EBLIN", "GLADIR", "IVANIR") | inlist(name,  "JACY", "NADIR", "ONEIDE", "ORILDES", "REBEHECA")
replace gender = "Male" if inlist(name, "AIRES", "ARLETES", "ATNAGORAS", "CLAIR", "CLARCKSON", "DEFENDENTE") | inlist(name, "EDILEZ", "EVERANDIR", "IWRARU", "JACHON", "JACV", "JASSEIR", "JOGELSON", "JURACI") |  inlist(name, "LELCIDES", "MANACESS", "NEUCIR", "NILSELENO", "NINDBERG", "OZEMARIO") | inlist(name,  "SHAKESPEARE", "SIDELVAR", "TODSON", "UYRAMIR", "VANDERLI", "VILOARVALHOBALDO", "WALZENIR", "WENNDER")
replace gender = "Female" if inlist(name, "ANNYELI", "DIANYEIRE", "ELGIANE", "HILDINETE", "IDIARINA", "IDJARRINA", "ILANDE") | inlist(name,  "IDJAWALA", "J", "JUSSEINETE", "MAGNOVANDA", "NEIVANILDA", "PINHILDINETE", "ROSARIO", "SILMONICA", "TAMAQUARA")
drop _merge 
tab gender, m 
gen female = (gender == "Female") if gender!="NA"
// save dataset with names
drop n_* 
save "$files/National_boards_centrals_names", replace

/* CREATE UNION CENTRAL PANEL (CENTRAL-YEAR LEVEL)*/
use "$files/National_boards_centrals_names", clear

// gen dummies for female president or VP
gen is_president = (FUNÇÃO=="presidente")
gen is_VP = (FUNÇÃO=="vice-presidente")
gen female_P = (female == 1 & is_president ==1) 
gen female_VP = (female == 1 & is_VP ==1)
gen count = 1 
encode CENTRAL, gen(central)
drop CENTRAL

// collapse stats at union central-year  
	collapse (sum) count (mean) female (max) female_P female_VP is_president is_VP, by(CNPJ central 	start_year end_year)
	tsset central start_year
	tsfill, full

	local tofill count female female_P female_VP is_president is_VP CNPJ

	foreach var of local tofill {
		replace `var' = l.`var' if `var'==. 
	}
	drop if count == . 

	// save
order central CNPJ start_year, first
save "$files/National_boards_centrals_collapsed", replace
 
cap log close
